********************************************************************************
** Communicating doctors' consensus persistently increases Covid-19 vaccinations
** Data cleaning file
********************************************************************************

log using "$logpath/01_data_clean", replace

use "$rawdatapath/communicating_consensus.dta", clear

*****************************************************************
*** VARIABLE DEFINITIONS VACCINATION EXPERIMENT
*****************************************************************

tab nQ275_r1
tab nQ276_r1
tab nQ277_r1

**** Outcomes definitions
*** Note: third dose added in wave 36 (option 4 in nQ275_r1)
gen vaccinated=nQ275_r1==1 | nQ275_r1==2 | nQ275_r1==4
gen registered=nQ276_r1==1 | nQ276_r1==2 | nQ276_r1==3  // change also registered=1 if "Registrace Ne + Termín ANO" 
replace registered=1 if vaccinated==1 // registered=1 if already vaccinated (PAP) 
gen vaccinate_intention=nQ277_r1==1 | nQ277_r1==2 // positive vaccination intentions (also for those who already vaccinated or those registered)
replace vaccinate_intention=1 if vaccinated==1 
replace vaccinate_intention=1 if registered==1
tab nQ277_r1 vaccinate_intention
tab vaccinate_intention vaccinated
tab vaccinate_intention registered
tab nQ277_r1 vaccinated
tab nQ277_r1 registered
*replace vaccinate_intention=0 if  nQ277_r1==3 | nQ277_r1==4 | nQ277_r1==5
replace vaccinated=. if vlna==36 & obfuscated==1 & nQ275_r1==. // for some observations in obfuscation survey (wave 36) we do not have main survey values for that round...
replace registered=. if vlna==36 & obfuscated==1 & nQ275_r1==. // for some observations in obfuscation survey (wave 36) we do not have main survey values for that round...
replace vaccinate_intention=. if vlna==36 & obfuscated==1 & nQ275_r1==. // for some observations in obfuscation survey (wave 36) we do not have main survey values for that round...


*** Wave participation
foreach x in 13 18 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 {
	gen vlna`x'=vlna==`x'
	bysort respondentId: egen vlna`x'_part=max(vlna`x')
	}

	tab vlna24_part if vlna25==1
replace vlna36_part=0 if  nQ275_r1==. & vlna==36 // Note: 50 individuals participating in an obfuscation survey but not in main experiment; we do not count these towards wave 36 participants!!!
gen vlna36_part_h=vlna36_part if vlna==36
drop vlna36_part
bysort respondentId: egen vlna36_part=max(vlna36_part_h)
replace vlna36_part=0 if vlna36_part==.
drop vlna36_part_h
	
*** Obfuscated survey participation	
bysort respondentId: egen obfuscated_part=max(obfuscated)
replace obfuscated_part=0 if obfuscated_part==.

*** Student certification data
* Did the respondent provide a vaccination certificate?
gen vac_cert_y_h = nQ386_r1==1 | nQ386_r1==2 | nQ386_r1==3 | nQ399_r1==1 | nQ399_r1==2 | nQ399_r1==3
replace vac_cert_y_h=. if vlna!=36 | (vlna36_part==0 & obfuscated==1) // also remember 50 cases of individuals in obfuscation survey and not in wave 36
bysort respondentId: egen vac_cert_y=max(vac_cert_y_h)
drop vac_cert_y_h

gen vac_no_cert_h=vaccinated & vac_cert_y==0 & vlna==36
replace vac_no_cert_h=. if vlna!=36 | (vlna36_part==0 & obfuscated==1) // also remember 50 cases of individuals in obfuscation survey and not in wave 36
bysort respondentId: egen vac_no_cert=max(vac_no_cert_h)
drop vac_no_cert_h

*** Vaccinated dummy for respondents who provided any certificate information (non-verified)
*** A) count everyone not participating in wave 36 as not vaccinated (had no chance to provide certificate)
gen vaccinated_anycert=vaccinated if vac_cert_y==1
replace vaccinated_anycert=0 if vac_cert_y==0 // we take wave 36 respondents who did not provide a certificate as unvaccinated
replace vaccinated_anycert=0 if vac_cert_y==.
replace vaccinated_anycert=. if vaccinated==.

*** B) count everyone not participating in wave 36 as vaccinated (had no chance to provide certificate)
gen vaccinated_anycert2=vaccinated if vac_cert_y==1
replace vaccinated_anycert2=0 if vac_cert_y==0 // we take wave 36 respondents who did not provide a certificate as unvaccinated
replace vaccinated_anycert2=1 if vac_cert_y==.
replace vaccinated_anycert2=. if vaccinated==.


** Add data from student coding of certificate data
preserve
import excel "$rawdatapath/2007_w36certificatecoding_v01.xlsx", firstrow clear 
save "$rawdatapath/2007_w36certificatecoding_v01.dta", replace
restore

merge m:1 respondentId using "$rawdatapath/2007_w36certificatecoding_v01.dta"
rename _merge _merge_certificatecoding
replace prophylaxis_M=0 if _merge_certificatecoding==1 // if not certified or not having certificate data, count as not certified -> then we have a full sample
replace vaccine_M=0 if _merge_certificatecoding==1
replace prophylaxis_T=0 if _merge_certificatecoding==1
replace vaccine_T=0 if _merge_certificatecoding==1

** Vaccination certificate data verified (by students)
gen certificate_confirmed=(prophylaxis_M+vaccine_M+prophylaxis_T+vaccine_T)>=2 // either one student was certain (gave 1 for both) or the students said in at least one instance

*** Vaccinated dummy for respondents with verified certificates (by students)
*** Count everyone not certified as NOT vaccinated
gen vaccinated_certcode=vaccinated if certificate_confirmed==1
replace vaccinated_certcode=0 if vaccinated_certcode==.
replace vaccinated_certcode=. if vaccinated==.

*** Vaccinated dummy for respondents with same vaccination status in panel and obfuscated survey
// we only count those who said were vaccinated in both obfuscated and regular survey in wave 36 - fairly conservative! -> some people not in wave 36 but in obfuscated; some not in obfuscated (we can say extremely high correlation between regular and obfuscated: 0.965)
gen obfuscation_confirmed_h=vaccinated==vaccinated_obfuscated & vaccinated!=. & vaccinated_obfuscated!=. & vlna==36
bysort respondentId: egen obfuscation_confirmed=max(obfuscation_confirmed_h)
drop obfuscation_confirmed_h

*** Count everyone not certified as NOT vaccinated
gen vaccinated_obfconfirmed=vaccinated if obfuscation_confirmed==1
replace vaccinated_obfconfirmed=0 if vaccinated_obfconfirmed==.
replace vaccinated_obfconfirmed=. if vaccinated==.

*** Second and third dose dummies
gen vaccinated_dose2= nQ275_r1==2 | nQ275_r1==4 | nQ346_r1==6 // at least having two doses in main panel
replace vaccinated_dose2=. if vlna==36 & obfuscated==1 & nQ275_r1==.
gen vaccinated_dose3= nQ275_r1==4 | nQ346_r1==6 // at least having three doses in main panel
replace vaccinated_dose3=. if vlna==36 & obfuscated==1 & nQ275_r1==.
gen vaccinate_intention_dose3= (nQ346_r1==1 | nQ346_r1==2 | nQ346_r1==6 | nQ346_r1==7) | nQ275_r1==4 // intending to get vaccinated with a third dose, having a date booked, or having a shot already
replace vaccinate_intention_dose3=. if vlna==36 & obfuscated==1 & nQ275_r1==.



*** Information experiment treatment dummy 
tab nQ302

gen info_exp_temp=nQ302==1 & vlna==25 // information experiment treatment
replace info_exp_temp=. if nQ297_r==1  & vlna==25 // those working in media not in experiment
egen info_exp=max(info_exp_temp), by(respondentId)
label define info_exp 0 "C" 1 "T"
label values info_exp info_exp
tab info_exp 
*Sample = only those participated in wave 25.
drop if vlna25_part==0
drop if info_exp==.
tab info_exp 
tab info_exp if vlna==25

*** Information demand dummy
gen info_demand_temp=nQ298_r1==1 & vlna==25
egen info_demand=max(info_demand_temp), by(respondentId)

*** Beliefs
* Beliefs: Doctor's vaccination intentions
gen doctor_vaccine_takeup_overest_t=nQ300_1_1>=90 & vlna==25 // exact match==also overestim
egen doctor_vaccine_takeup_overest=max(doctor_vaccine_takeup_overest_t), by(respondentId)
tab doctor_vaccine_takeup_overest // 6.4 % overstimating doctor's takeup

gen beliefs_takeup_W25_t=nQ300_1_1 if vlna==25
egen beliefs_takeup_W25=max(beliefs_takeup_W25_t), by(respondentId)
tab beliefs_takeup_W25


* underestimating median split using: 
sum nQ300_1_1 if nQ300_1_1<90 & vlna==25, d
tab nQ300_1_1 if nQ300_1_1<90 & vlna==25 // median among those underestimating==(0-55); (56-89) 
gen doctor_takeup_underest_more_t=0 if nQ300_1_1<=55 & vlna==25
replace doctor_takeup_underest_more_t=1 if nQ300_1_1>55 & nQ300_1_1<90 & vlna==25
tab nQ300_1_1 doctor_takeup_underest_more_t
egen doctor_takeup_underest_more=max(doctor_takeup_underest_more_t), by(respondentId)
tab nQ300_1_1 doctor_takeup_underest_more if vlna==25
tab doctor_takeup_underest_more

* Beliefs: Doctor's trust
gen doctor_vaccine_trust_overest_t=nQ301_1_1>=89 & vlna==25 // exact match==also overestim
egen doctor_vaccine_trust_overest=max(doctor_vaccine_trust_overest_t), by(respondentId)
tab doctor_vaccine_trust_overest // 11.7 % overstimating doctor's trust

gen beliefs_trust_W25_t=nQ301_1_1 if vlna==25
egen beliefs_trust_W25=max(beliefs_trust_W25_t), by(respondentId)
tab beliefs_trust_W25

* underestimating median split using: 
sum nQ301_1_1 if nQ301_1_1<89 & vlna==25, d
tab nQ301_1_1 if nQ301_1_1<89 & vlna==25 // median among those underestimating==(0-57); (58-88) 
gen doctor_trust_underest_more_t=0 if nQ301_1_1<=57 & vlna==25
replace doctor_trust_underest_more_t=1 if nQ301_1_1>57 & nQ301_1_1<89 &  vlna==25
tab nQ301_1_1 doctor_trust_underest_more_t
egen doctor_trust_underest_more=max(doctor_trust_underest_more_t), by(respondentId)
tab nQ301_1_1 doctor_trust_underest_more if vlna==25
tab doctor_trust_underest_more 

drop info_exp_temp info_demand_temp doctor_vaccine_takeup_overest_t doctor_takeup_underest_more_t doctor_vaccine_trust_overest_t doctor_trust_underest_more_t

	
******Control variables	
	
*Number of children 
rename children children_t
bysort respondentId: egen children=max(children_t)
order children, after(children_t)
drop children_t
label variable children "Number of children"
tab children if vlna25==1 // we do not have it for everyone (1958/2101 in info_exp).
gen children_missing=0
replace children_missing=1 if children==.
replace children=0 if children==.
tab children children_missing if vlna25==1

*** Vaccinated in wave 25
gen vaccinated25_t=(nQ275_r1==1 | nQ275_r1==2) if vlna==25
bysort respondentId: egen vaccinated25=max(vaccinated25_t)
replace vaccinated25=. if vlna25_part==0
drop vaccinated25_t

*** Beliefs in wave 25
gen belief_takeup25_t=nQ300_1_1 if vlna==25
bysort respondentId: egen belief_takeup25= max(belief_takeup25_t)
drop belief_takeup25_t

gen belief_trust25_t=nQ301_1_1 if vlna==25
bysort respondentId: egen belief_trust25= max(belief_trust25_t)
drop belief_trust25_t

***Past vaccination intentions
*should be from wave 24 where available or otherwise the closest prior values
* if vaccinated OR registered/has an appointment OR intends to (definitely or rather yes)
* note: in waves 13, 18, and 21 only hypothetical; vaccinations started in January
* note: if registered and not having an appointment OR having an appointment without registration, intentions question still asked! This may lead to few seemingly odd cases of people registered but not intending to go
foreach x in 13 18 21 22 23 24 {
	gen vaccineyes`x'_t=(nQ200_r1==1 | nQ200_r1==2) | (nQ277_r1==1 | nQ277_r1==2) | (nQ276_r1==1 | nQ276_r1==2 | nQ276_r1==3) | (nQ275_r1==1 | nQ275_r1==2) if vlna==`x'
	bysort respondentId: egen vaccineyes`x'=max(vaccineyes`x'_t)
	replace vaccineyes`x'=. if vlna`x'_part==0
	drop vaccineyes`x'_t
}

gen vaccineyesprior=.
replace vaccineyesprior=1 if  vlna24_part==1 & vaccineyes24==1
replace vaccineyesprior=0 if  vlna24_part==1 & vaccineyes24==0
tab vaccineyesprior if vlna==25
sort respondentId vlna
*browse respondentId vlna vaccineyes13 vaccineyes18 vaccineyes21 vaccineyes22 vaccineyes23 vaccineyes24 vlna13_part vlna18_part vlna21_part vlna22_part vlna23_part vlna24_part vlna25_part vaccineyesprior if vaccineyesprior ==. & vlna==25
replace vaccineyesprior=1 if  vaccineyesprior==. & vlna23_part==1 & vaccineyes23==1
replace vaccineyesprior=0 if  vaccineyesprior==. & vlna23_part==1 & vaccineyes23==0
replace vaccineyesprior=1 if  vaccineyesprior==. & vlna22_part==1 & vaccineyes22==1
replace vaccineyesprior=0 if  vaccineyesprior==. & vlna22_part==1 & vaccineyes22==0
replace vaccineyesprior=1 if  vaccineyesprior==. & vlna21_part==1 & vaccineyes21==1
replace vaccineyesprior=0 if  vaccineyesprior==. & vlna21_part==1 & vaccineyes21==0
replace vaccineyesprior=1 if  vaccineyesprior==. & vlna18_part==1 & vaccineyes18==1
replace vaccineyesprior=0 if  vaccineyesprior==. & vlna18_part==1 & vaccineyes18==0
replace vaccineyesprior=1 if  vaccineyesprior==. & vlna13_part==1 & vaccineyes13==1
replace vaccineyesprior=0 if  vaccineyesprior==. & vlna13_part==1 & vaccineyes13==0
tab vaccineyesprior if vlna==25
tab respondentId if vaccineyesprior==. 
***14 people, who participated in the experiment in wave 25, but we do not have past vaccination intentions (did not participate in wave 13, 18, 21, 22, 24)
***dummy out for missing
gen vaccineyesprior_missing=0
replace vaccineyesprior_missing=1 if vaccineyesprior==.
replace vaccineyesprior=0 if vaccineyesprior==.
tab vaccineyesprior vaccineyesprior_missing if vlna25==1


*balance tests:
ttest vaccineyesprior if vlna==25, by(info_exp)
tab vaccineyesprior info_exp if vlna==24, chi2 exact
ttest vaccinated if vlna==24, by(info_exp)
tab vaccinated info_exp if vlna==24, chi2 exact


* Variable defining whether participant participated across all waves 24 through 36
gen allwaves36=vlna24_part==1 & vlna25_part==1 & vlna26_part==1 & vlna27_part==1 & vlna28_part==1 & vlna29_part==1 & vlna30_part==1 & vlna31_part==1 & vlna32_part==1 & vlna33_part==1 & vlna34_part==1 & vlna35_part==1 & vlna36_part==1 // 1212 individuals/2101 individuals!


*overlap over and underestimators 
tab doctor_vaccine_trust_overest doctor_vaccine_takeup_overest if vlna==25


**************************************
********fix controls at wave 25

*******female
	tab female
	tab female if vlna==25
	egen female_sd=sd(female), by(respondentId)
	sum female_sd
	*ok, no variation within respondentId
	sum female if vlna>=24
	*but some missing / fix at vlna==25
	replace female=. if vlna!=25
	egen female_h=min(female), by(respondentId)
	drop female female_sd
	rename female_h female
	egen female_sd=sd(female), by(respondentId)
	sum female_sd
	*ok, no variation
	drop female_sd
	tab female if vlna>=24
	tab female if vlna==25
	
********age_cat
	tab age_cat if vlna>=24
	tab age_cat if vlna==25
	egen age_cat_sd=sd(age_cat), by(respondentId)
	sum age_cat_sd
	foreach x in 1 2 3 4 5 6 {
	egen d_age_cat`x'_sd=sd(d_age_cat`x'), by(respondentId)
	sum d_age_cat`x'_sd
	}
	*some variation - fix at wave 25
	
    drop d_age_cat1 d_age_cat2 d_age_cat3 d_age_cat4 d_age_cat5 d_age_cat6 
	replace age_cat=. if vlna!=25
	egen age_cat_h=min(age_cat), by(respondentId)
	drop age_cat age_cat_sd
	rename age_cat_h age_cat
	egen age_cat_sd=sd(age_cat), by(respondentId)
	sum age_cat_sd
	*ok, no more variation
	drop age_cat_sd
	tab age_cat if vlna>=24
	tab age_cat if vlna==25

	tab age_cat, gen(d_age_cat)
	label variable d_age_cat1 "Age 18-24 (d)"
	label variable d_age_cat2 "Age 25-34 (d)"
	label variable d_age_cat3 "Age 35-44 (d)"
	label variable d_age_cat4 "Age 45-54 (d)"
	label variable d_age_cat5 "Age 55-64 (d)"
	label variable d_age_cat6 "Age 65+ (d)"
	foreach x of varlist d_age_cat* {
		label values `x' yes_no
		}
		
*****hsize		
	tab hsize if vlna>=25
	tab hsize if vlna==25
	egen hsize_sd=sd(hsize), by(respondentId)
	sum hsize_sd
	*some variation - fix at wave 25
	replace hsize=. if vlna!=25
	egen hsize_h=min(hsize), by(respondentId)
	drop hsize hsize_sd
	rename hsize_h hsize
	egen hsize_sd=sd(hsize), by(respondentId)
	sum hsize_sd
	*ok, no more variation
	drop hsize_sd
	tab hsize if vlna>=25
	tab hsize if vlna==25
	
*******children
	tab children if vlna>=25
	tab children if vlna==25
	egen children_sd=sd(children), by(respondentId)
	sum children_sd
	*ok, no variation within respondentId
	drop children_sd

******region
	tab region if vlna>=25
	tab region if vlna==25
	egen region_sd=sd(region), by(respondentId)
	sum region_sd
	*no variation, but some missing

	drop d_region1 d_region2 d_region3 d_region4 d_region5 d_region6 d_region7 d_region8 d_region9 d_region10 d_region11 d_region12 d_region13 d_region14 
	replace region=. if vlna!=25
	egen region_h=min(region), by(respondentId)
	drop region region_sd
	rename region_h region
	egen region_sd=sd(region), by(respondentId)
	sum region_sd
	*ok, no more variation
	drop region_sd
	tab region if vlna>=25
	tab region if vlna==25

	quietly tabulate region, generate(d_region)
	label variable d_region1 "Region | Prague (d)"
	label variable d_region2 "Region | Central Bohemia (d)"
	label variable d_region3 "Region | South Bohemia (d)"
	label variable d_region4 "Region | Plzeň (d)"
	label variable d_region5 "Region | Karlovy Vary (d)"
	label variable d_region6 "Region | Ústí (d)"
	label variable d_region7 "Region | Liberec (d)"
	label variable d_region8 "Region | Hradec Králové (d)"
	label variable d_region9 "Region | Pardubice (d)"
	label variable d_region10 "Region | Vysočina (d)"
	label variable d_region11 "Region | South Moravia (d)"
	label variable d_region12 "Region | Olomouc (d)"
	label variable d_region13 "Region | Zlín (d)"
	label variable d_region14 "Region | Moravia-Silesia (d)"
	foreach x of varlist d_region* {
		label values `x' yes_no
		}
	
		
******townsize
	tab townsize if vlna>=25
	tab townsize if vlna==25
	egen townsize_sd=sd(townsize), by(respondentId)
	sum townsize_sd
	*no variation, but some missing

	drop d_townsize1 d_townsize2 d_townsize3 d_townsize4 d_townsize5 d_townsize6 d_townsize7 
	replace townsize=. if vlna!=25
	egen townsize_h=min(townsize), by(respondentId)
	drop townsize townsize_sd
	rename townsize_h townsize
	egen townsize_sd=sd(townsize), by(respondentId)
	sum townsize_sd
	*ok, no more variation
	drop townsize_sd
	tab townsize if vlna>=25
	tab townsize if vlna==25

	tab townsize , gen(d_townsize)
	label variable d_townsize1 "Town size | Below 999 (d)"
	label variable d_townsize2 "Town size | 1,000-1,999 (d)"
	label variable d_townsize3 "Town size | 2,000-4,999 (d)"
	label variable d_townsize4 "Town size | 5,000-19,999 (d)"
	label variable d_townsize5 "Town size | 20,000-49,999 (d)"
	label variable d_townsize6 "Town size | 50,000-99,999 (d)"
	label variable d_townsize7 "Town size | Above 100,000 (d)"
	foreach x of varlist  d_townsize* {
		label values `x' yes_no
		}

******education
	tab educ if vlna>=25
	tab educ if vlna==25
	egen educ_sd=sd(educ), by(respondentId)
	sum educ_sd
	foreach x in 1 2 3 4 {
	egen d_educ`x'_sd=sd(d_educ`x'), by(respondentId)
	sum d_educ`x'_sd
}
	*some variation - fix at wave 25
		
	drop d_educ1 d_educ2 d_educ3 d_educ4 
	replace educ=. if vlna!=25
	egen educ_h=min(educ), by(respondentId)
	drop educ educ_sd
	rename educ_h educ
	egen educ_sd=sd(educ), by(respondentId)
	sum educ_sd
	*ok, no more variation
	drop educ_sd
	tab educ
	tab educ if vlna==25
	*ok, no variation within respondentId
			
	tab educ, gen(d_educ)
	label variable d_educ1 "Education | elementary (d)"
	label variable d_educ2 "Education | high school no degree (d)"
	label variable d_educ3 "Education | high school degree (d)"
	label variable d_educ4 "Education | university (d)"
	foreach x of varlist d_educ* {
	label values `x' yes_no
	}

******estat
	tab estat if vlna>=25
	tab estat if vlna==25
	egen estat_sd=sd(estat), by(respondentId)
	sum estat_sd
	foreach x in 1 2 3 4 5 6 7 {
	egen d_estat`x'_sd=sd(d_estat`x'), by(respondentId)
	sum d_estat`x'_sd
}
	*some variation + some missing - fix at wave 25
		
	drop d_estat1 d_estat2 d_estat3 d_estat4 d_estat5 d_estat6 d_estat7
	replace estat=. if vlna!=25
	egen estat_h=min(estat), by(respondentId)
	drop estat estat_sd
	rename estat_h estat
	egen estat_sd=sd(estat), by(respondentId)
	sum estat_sd
	*ok, no more variation
	drop estat_sd
	tab estat if vlna>=25
	tab estat if vlna==25
	*ok, no variation within respondentId
			
	tab estat, gen(d_estat)
	label variable d_estat1 "Economic status | Employee (d)"
	label variable d_estat2 "Economic status | Entrepreneur (d)"
	label variable d_estat3 "Economic status | Student (d)"
	label variable d_estat4 "Economic status | Parental leave (d)"
	label variable d_estat5 "Economic status | Retired (d)"
	label variable d_estat6 "Economic status | Unemployed (d)"
	label variable d_estat7 "Economic status | Other (d)"
	foreach x of varlist  d_estat* {
		label values `x' yes_no
		}
		
******Income
*use current HH income instead of CNP income from January 2020
	tab HHincome
	
	tab HHincome if vlna>=25
	tab HHincome if vlna==25
	egen HHincome_sd=sd(HHincome), by(respondentId)
	sum HHincome_sd
	*some variation - fix at wave 25
	
    replace HHincome=. if vlna!=25
	egen HHincome_h=min(HHincome), by(respondentId)
	drop HHincome HHincome_sd
	rename HHincome_h HHincome
	egen HHincome_sd=sd(HHincome), by(respondentId)
	sum HHincome_sd
	*ok, no more variation
	drop HHincome_sd
	tab HHincome if vlna>=25
	tab HHincome if vlna==25
	
	tab HHincome, gen(d_HHincome)
	label variable d_HHincome1 "Household income | Up to 10,000 CZK (d)"
	label variable d_HHincome2 "Household income | 10,001 - 15,000 CZK (d)"
	label variable d_HHincome3 "Household income | 15,001 - 20,000 CZK (d)"
	label variable d_HHincome4 "Household income | 20,001 - 25,000 CZK (d)"
	label variable d_HHincome5 "Household income | 25,001 - 30,000 CZK (d)"
	label variable d_HHincome6 "Household income | 30,001 - 35,000 CZK (d)"
	label variable d_HHincome7 "Household income | 35,001 - 40,000 CZK (d)"
	label variable d_HHincome8 "Household income | 40,001 - 50,000 CZK (d)"
	label variable d_HHincome9 "Household income | 50,001 - 60,000 CZK (d)"
	label variable d_HHincome10 "Household income | Over 60,000 CZK (d)"
	label variable d_HHincome11 "Household income | I don't know (d)"
	foreach x of varlist d_hincome_above_median d_HHincome* {
		label values `x' yes_no
		}
		
*Drop _sd variables (just help)		
drop d_age_cat1_sd d_age_cat2_sd d_age_cat3_sd d_age_cat4_sd d_age_cat5_sd d_age_cat6_sd d_educ1_sd d_educ2_sd d_educ3_sd d_educ4_sd d_estat1_sd d_estat2_sd d_estat3_sd d_estat4_sd d_estat5_sd d_estat6_sd d_estat7_sd
		
**************************************
****Check variables	
		
*vlna 25
sum vaccinated vaccinate_intention female d_age_cat2 d_age_cat3 d_age_cat4 d_age_cat5 d_age_cat6 hsize children children_missing d_region2 d_region3 d_region4 d_region5 d_region6 d_region7 d_region8 d_region9 d_region10 d_region11 d_region12 d_region13 d_region14 d_townsize2 d_townsize3 d_townsize4 d_townsize5 d_townsize6 d_townsize7 d_educ2 d_educ3 d_educ4 d_estat2 d_estat3 d_estat4 d_estat5 d_estat6 d_estat7 d_HHincome2 d_HHincome3 d_HHincome4 d_HHincome5 d_HHincome6 d_HHincome7 d_HHincome8 d_HHincome9 d_HHincome10 d_HHincome11  vaccineyesprior vaccineyesprior_missing doctor_trust_underest_more doctor_takeup_underest_more if vlna==25 

*vlna 24+
sum vaccinated vaccinate_intention female d_age_cat2 d_age_cat3 d_age_cat4 d_age_cat5 d_age_cat6 hsize children children_missing d_region2 d_region3 d_region4 d_region5 d_region6 d_region7 d_region8 d_region9 d_region10 d_region11 d_region12 d_region13 d_region14 d_townsize2 d_townsize3 d_townsize4 d_townsize5 d_townsize6 d_townsize7 d_educ2 d_educ3 d_educ4 d_estat2 d_estat3 d_estat4 d_estat5 d_estat6 d_estat7 d_HHincome2 d_HHincome3 d_HHincome4 d_HHincome5 d_HHincome6 d_HHincome7 d_HHincome8 d_HHincome9 d_HHincome10 d_HHincome11 vaccineyesprior vaccineyesprior_missing doctor_trust_underest_more doctor_takeup_underest_more if vlna>=24

//Randomization check controls
orth_out female d_age_cat2 d_age_cat3 d_age_cat4 d_age_cat5 d_age_cat6 hsize children children_missing d_region2 d_region3 d_region4 d_region5 d_region6 d_region7 d_region8 d_region9 d_region10 d_region11 d_region12 d_region13 d_region14 d_townsize2 d_townsize3 d_townsize4 d_townsize5 d_townsize6 d_townsize7 d_educ2 d_educ3 d_educ4 d_estat2 d_estat3 d_estat4 d_estat5 d_estat6 d_estat7 d_HHincome2 d_HHincome3 d_HHincome4 d_HHincome5 d_HHincome6 d_HHincome7 d_HHincome8 d_HHincome9 d_HHincome10 d_HHincome11  vaccineyesprior vaccineyesprior_missing if vlna==25  , by(info_exp) replace pcompare count bdec(2) 

//Randomization check Beliefs
ttest beliefs_trust_W25 if vlna==25,by(info_exp)
ranksum beliefs_trust_W25 if vlna==25,by(info_exp)
ttest beliefs_takeup_W25 if vlna==25,by(info_exp)
ranksum beliefs_takeup_W25 if vlna==25,by(info_exp)

// control for beliefs about doctor's support, as there seems to be a imbalance

//Fixed sample - check attrition_check
foreach x in 26 27 28 29 30 31 32 33 34 35 36 {
	ttest vlna`x'_part if vlna==25, by(info_exp)
	tab vlna`x'_part info_exp if vlna==25, chi2

	}
*looks good, no significant differences

save "$cleandatapath/communicating_consensus_clean.dta", replace

log close
